
import util
import guoziyaowen
import zonghebaodao1
import dongshizhang
import getHTMLToJson

import sys
import os

localPath = r"D:\aGetDataFrom111111"
# 设置日志文件路径
log_file_path = os.path.join(localPath, "crawl_output.log")

# 重定向 stdout 到文件
sys.stdout = open(log_file_path, 'w', encoding='utf-8')



def main():
    if not os.path.exists(localPath):
        os.makedirs(localPath)


    # url = f"http://www.cmst.com.cn/cctgroup/xwzx/gzdt4/599518/index.html"
    # html_content = fetch_page_content(url)
    # if html_content:
    #     parsed_data = parse_content_gzyw_1(html_content,localPath)


    # # 国资要闻
    # for i in range(1, 45):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652606/ced1eee7-{i}.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = guoziyaowen.parse_content_gzyw(html_content,localPath)

    # # 新闻中心-综合报道
    # url = f"http://www.cmstd.com.cn/cmstd/xwzx6/zbxxfb/index.html"
    # html_content = util.fetch_page_content(url)
    # if html_content:
    #     parsed_data = zonghebaodao1.parse_content_zhbd(html_content, localPath, "中储-招标信息发布")
    # for i in range(0, 3):
    #     url = f"http://www.cmstd.com.cn/cmstd/xwzx6/728376/fec89b85-{i}.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = zonghebaodao1.parse_content_zhbd(html_content,localPath,"改革专栏-中储")

    # 新闻中心-行业咨询
    # for i in range(1, 9):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652608/652612/97a48017-{i}.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = zonghebaodao1.parse_content_zhbd(html_content,localPath,"行业咨询")

    # # # 新闻中心-经营动态
    # for i in range(1, 20):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652608/652614/96ae9805-{i}.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = zonghebaodao1.parse_content_zhbd(html_content, localPath, "经营动态")

    # # # 党群工作
    for i in range(1, 15):
        url = f"http://www.cmstd.com.cn/cmstd/dqgz49/11b3cb30-{i}.html"
        html_content = util.fetch_page_content(url)
        if html_content:
            parsed_data = zonghebaodao1.parse_content_zhbd(html_content, localPath, "党群工作-中储")

    # # # 党群工作-二十节全会
    # for i in range(1, 3):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652650/773725/4a818b9b-{i}.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = zonghebaodao1.parse_content_zhbd(html_content, localPath, "党群工作")

    # # 加入中储
    # for i in range(1, 2):
    #     url = f"http://www.cmst.com.cn/zgwzcy/rczp3/jrzc/index.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = zonghebaodao1.parse_content_zhbd(html_content, localPath, "加入中储")

    # # 公司网络-各公司介绍
    # start_id = 652563
    # end_id = 652581
    #
    # for page_id in range(start_id, end_id + 1, 2):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652550/652561/{page_id}/index.html"
    #     print(f"正在抓取: {url}")
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = dongshizhang.parse_content_dszzc(html_content, localPath)


    # # 董事长致辞
    # for i in range(1, 2):
    #     url = f"http://www.cmst.com.cn/zgwzcy/652678/index.html"
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = dongshizhang.parse_content_dszzc(html_content, localPath)

    # 调用函数处理HTML文件
    # news_data = getHTMLToJson.process_html_files(r"D:\aGetDataFrom111111\02国资要闻")
    #
    # # 打印生成的newsData
    # print(news_data)
    # print("完成")

    # # 物资集团-英文网站
    # start_id = 652869
    # end_id = 652879
    #
    # for page_id in range(start_id, end_id + 1, 2):
    #     url = f"http://www.cmst.com.cn/ezgwzcy/652867/{page_id}/index.html"
    #     print(f"正在抓取: {url}")
    #     html_content = util.fetch_page_content(url)
    #     if html_content:
    #         parsed_data = dongshizhang.parse_content_English(html_content, localPath)

    # html_content = util.fetch_page_content('http://www.cmstd.com.cn/cmstd/xwzx6/zhbd/666328/index.html')
    # if html_content:
    #     parsed_data = zonghebaodao1.parse_content_zhbd_1(html_content, r"D:\aGetDataFrom111111\中储发展股份有限公司网站")




if __name__ == "__main__":
    main()


# 恢复默认 stdout 并关闭文件
sys.stdout.close()
sys.stdout = sys.__stdout__
